<?php
/**
 * Created by PhpStorm.2017.1
 * User: Daniel<danieltang521@gmail.com>
 * Date: 2020/1/7
 * Time: 14:39
 * Class ${NAME}
 * @property $${NAME}_model ${NAME}_model
 * Copyright: Daniel Shanghai China. All rights reserved.
 */

require_once "excel.class.php";
$jsonCityStr = require_once "cityList.php";

function unicodeDecode($unicode_str){
    $json = '{"str":"'.$unicode_str.'"}';
    $arr = json_decode($json,true);
    if(empty($arr)) return '';
    return $arr['str'];
}

//导出数据
function exportData($ss,$fileName){
    $data['tableName'] = $fileName;//表名
    $data['rowName'] = ['校区名字', '电话','地址','城市','备注'];//行名
    $tmp = [];
    if (!empty($ss)) {
        foreach ($ss as $k => $v) {
            $tmp[$k][0] = $v['storeName'];
            $tmp[$k][1] = $v['phone'];
            $tmp[$k][2] = $v['address'];
            $tmp[$k][3] = $v['city'];
            $tmp[$k][4] = $v['storeId'];
        }
    }
    $data['value'] = $tmp;//数据
    $excel = new excel();
    $excel->saveLocal($data);
}

//获取用户浏览器信息
function getAgent(){
    $agent = array(
        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
        'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
        'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
        'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)',
        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134',
    );
    return $agent[rand(0,10)];
}

//获取页面
function curl_get($url,$exec=0)
{
    $str = uniqid();
    $ch = curl_init();//初始化
    curl_setopt($ch, CURLOPT_NOBODY, 0);
    curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
    curl_setopt($ch,CURLOPT_URL,$url);//设置参数
    curl_setopt($ch, CURLOPT_HTTPHEADER, array("Content-Type: application/json; charset=utf-8;"));
    //curl_setopt($ch,CURLOPT_COOKIE,'tjc='.$str.'; __mta=188184215.1539748785506.1539749013751.1539762941679.1; uuid=8dd75a08c202402a9507.'.time().'.1.0.0; _lxsdk_cuid=166809c160d8d-03f341b2bd39dd-1f396652-1fa400-166809c160ec8; ci=30; rvct=30; __mta=217838505.1539748247374.1539748973285.1539756017055.3; _lxsdk_s=166809c1610-35a-6bc-9a0%7C%7C9');
    //curl_setopt($ch,CURLOPT_COOKIE,'uuid=33d30956f7e34b47b7ed.'.time().'.1.0.0; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; _lxsdk_cuid=16f7ea149fe2b-00554a71f6de2f-6701b35-1fa400-16f7ea14a024c; ci=65; rvct=65%2C490%2C151%2C359%2C357%2C42%2C1%2C55%2C10%2C80%2C1164; _lxsdk_s=16f82f0f51a-032-cbc-d2%7C%7C47');
    curl_setopt($ch,CURLOPT_COOKIE,'uuid=33d30956f7e34b47b7ed.'.time().'.1.0.0; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; _lxsdk_cuid=16f7ea149fe2b-00554a71f6de2f-6701b35-1fa400-16f7ea14a024c; ci='.rand(1,99).'; rvct=357%2C65%2C490%2C151%2C359%2C42%2C1%2C55%2C10%2C80%2C1164; _lxsdk_s=16f82f0f51a-032-cbc-d2%7C%7C63');

    curl_setopt ($ch, CURLOPT_REFERER,$url);//设置cURL允许执行的最长秒数
    curl_setopt ($ch, CURLOPT_TIMEOUT,0);//设置cURL允许执行的最长秒数
    curl_setopt($ch,CURLOPT_USERAGENT,getAgent());
    curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);//执行之后不直接打印出来default=1
    $execres = curl_exec($ch);
    if($exec){
        return $execres;
    }
    if(curl_errno($ch))
    {
        $errormsg = curl_error($ch);
        return json_encode(array('status'=>'fail','msg'=>$errormsg));
    }
    else
    {
        $info = curl_getinfo($ch);
        return json_encode(array('status'=>'ok','content'=>$execres,'msg'=>$info));
    }
    curl_close($ch);
}


//采集门店信息数据
function getStoreDetail($url){
    //$url = "https://www.meituan.com/xuexipeixun/95682780/";
    $html = curl_get($url);
    //var_dump($html);exit;
    preg_match_all('/window\.__LEGO_WIDGETS_FALLBACK__\.push\((.*)\)/isU',$html,$jsonStr);
    $data = json_decode(unicodeDecode($jsonStr[1][2]),true);
    $storeInfo['storeId'] = $data['params']['shopId'];
    $storeInfo['storeName'] = $data['params']['mapInfo']['shopName'];
    $storeInfo['address'] = $data['params']['shopInfo']['address'];
    $storeInfo['phone'] = $data['params']['shopInfo']['phoneNo'];
    $storeInfo['city'] = $data['params']['mapInfo']['cityName'];
    return $storeInfo;
}

//采集城市列表地址
function getCityList($url){
    //$url = "https://hanzhong.meituan.com/xuexipeixun/c20287/";
    $listStr = curl_get($url,1);
    preg_match_all('/<div class="list-item-desc-top"><a href=\"\/\/(.*)\" target="_blank" class="item-title">/isU',$listStr,$listJsonStr);

    //preg_match_all('/window.AppData = (.*);<\/script>/isU',$listStr,$listJsonStr);
    //$jsonListInfo = json_decode($listJsonStr[1][0],true);
    //var_dump($jsonListInfo['searchResult']['searchResult']);
    return $listJsonStr[1];
}

//获取采集城市列表
function getCollectionCityListUrl($jsonCityStr){
    $cityUrlList = array();
    $cityArray = json_decode($jsonCityStr,true);
    foreach ($cityArray as $k => $cityList){
        foreach ($cityList[1] as $item => $city){
            $cityUrlList[] = array("name"=>$city['name'],"url"=>"https://".$city['acronym'].".meituan.com/xuexipeixun/c20287/");
        }
    }
    return $cityUrlList;
}


//采集数据保存
function saveData($cityUrl){
    $fileName = $cityUrl['name']."-音乐类培训校区采集";
    $data = array();
    $page = 1;
    $str = "";
    do{
        $url = $cityUrl["url"]."pn".$page."/";
        $storeUrlList = getCityList($url);
        foreach ($storeUrlList as $k =>$v){
            $info = getStoreDetail("https://".$v);
            if (empty($info['storeName'])){
                file_put_contents("logs/log","https://".$v.PHP_EOL,FILE_APPEND);
                continue;
            }
            $data[] = $info;
            $str .= $info['storeName'].','.$info['phone'].','.$info['address'].PHP_EOL;
            file_put_contents("logs/log",$info['storeName'].','.$info['phone'].','.$info['address'].PHP_EOL,FILE_APPEND);
            sleep(1);
        }
        $page++;
    }while(!empty($storeUrlList));
    file_put_contents("logs/".$fileName.".txt",$str);
    exportData($data,$fileName);
}

//开始采集逻辑
$cityUrlList = getCollectionCityListUrl($jsonCityStr);
foreach ($cityUrlList as $item => $value){
    saveData($value);
    sleep(1);
}

//测试逻辑
//saveData(array("name"=>"汉中","url"=>"https://hanzhong.meituan.com/xuexipeixun/c20287/"));






